import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os
import pandas_profiling
#to get all columns in dataframe on page
pd.set_option('display.max_columns', None)
import PyPDF2
from scipy.stats import kurtosis
from scipy.stats import skew
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
#listing and evaluating each file
for f in os.listdir():
print(f.ljust(30) +"--" + str(round(os.path.getsize(f) / 1000000, 2)) + 'MB')
filename = 'Data - parkinsons.names'
file = open(filename,mode='r')
text = file.read()
file.close()
print(text)
filename = 'LETTER G - Z.pdf'
# creating an object
file = open(filename, 'rb')
# creating a pdf reader object
fileReader = PyPDF2.PdfFileReader(file)
# print the number of pages in pdf file
print(fileReader.numPages)
pageObj = fileReader.getPage(0)
pageObj.extractText()
for i in range(fileReader.numPages):
pageObj = fileReader.getPage(i)
print(pageObj.extractText())
from IPython.display import IFrame
IFrame(filename, width=600, height=900)
data_p=pd.read_csv('Data - Parkinsons')
print("Shape:",data_p.shape)
print("Contains 195 rows and 24 columns")
data_p.head(10)
data_p.tail(10)
df=data_p
df.info()
#df.describe(include='all')
df.describe().T
df.status.value_counts()
df.status.value_counts(1)*100
#Rearranging - moving status column towards end of file
stat=df.pop('status')
df['status'] = stat
f, axes = plt.subplots(1, 3, figsize=(15,8), sharey='row')
sns.distplot(df['MDVP:Flo(Hz)'],ax=axes[0],axlabel="MDVP:Flo(Hz)-Min-" + "skew:" + str(skew(df['MDVP:Flo(Hz)'])))
sns.distplot(df['MDVP:Fo(Hz)'],ax=axes[1],axlabel="MDVP:Fo(Hz)-Avg-" + "skew:" +str(skew(df['MDVP:Fo(Hz)'])))
sns.distplot(df['MDVP:Fhi(Hz)'],ax=axes[2],axlabel="MDVP:Fhi(Hz)-Max-" + "skew:" +str(skew(df['MDVP:Fhi(Hz)'])))
plt.show()
f, axes = plt.subplots(2,3,figsize=(15,8))
sns.distplot(df['MDVP:Jitter(%)'],ax=axes[0,0],axlabel="MDVP:Jitter(%)-" + "skew:" + str(skew(df['MDVP:Jitter(%)'])) )
sns.distplot(df['MDVP:Jitter(Abs)'],ax=axes[0,1],axlabel="MDVP:Shimmer(dB)-" + "skew:" + str(skew(df['MDVP:Jitter(Abs)'])) )
sns.distplot(df['MDVP:RAP'],ax=axes[0,2],axlabel="Shimmer:APQ3-" + "skew:" + str(skew(df['MDVP:RAP'])) )
sns.distplot(df['MDVP:PPQ'],ax=axes[1,0],axlabel="Shimmer:APQ5-" + "skew:" + str(skew(df['MDVP:PPQ'])) )
sns.distplot(df['Jitter:DDP'],ax=axes[1,1],axlabel="MDVP:APQ-" + "skew:" + str(skew(df['Jitter:DDP'])))
f, axes = plt.subplots(2,3,figsize=(15,8))
sns.distplot(df['MDVP:Shimmer'],ax=axes[0,0],axlabel="MDVP:Shimmer-" + "skew:" + str(skew(df['MDVP:Shimmer'])) )
sns.distplot(df['MDVP:Shimmer(dB)'],ax=axes[0,1],axlabel="MDVP:Shimmer(dB)-" + "skew:" + str(skew(df['MDVP:Shimmer(dB)'])) )
sns.distplot(df['Shimmer:APQ3'],ax=axes[0,2],axlabel="Shimmer:APQ3-" + "skew:" + str(skew(df['Shimmer:APQ3'])) )
sns.distplot(df['Shimmer:APQ5'],ax=axes[1,0],axlabel="Shimmer:APQ5-" + "skew:" + str(skew(df['Shimmer:APQ5'])) )
sns.distplot(df['MDVP:APQ'],ax=axes[1,1],axlabel="MDVP:APQ-" + "skew:" + str(skew(df['MDVP:APQ'])))
sns.distplot(df['Shimmer:DDA'],ax=axes[1,2],axlabel="Shimmer:DDA-" + "skew:" + str(skew(df['Shimmer:DDA'])))
f, axes = plt.subplots(1, 2, figsize=(15,8), sharey='row')
sns.distplot(df['NHR'],ax=axes[0],axlabel="NHR-" + "skew:" + str(skew(df['NHR'])))
sns.distplot(df['HNR'],ax=axes[1],axlabel="HNR-" + "skew:" +str(skew(df['HNR'])))
plt.show()
#same y-scale is not providing proper results
f, axes = plt.subplots(1, 2, figsize=(15,8))
sns.distplot(df['NHR'],ax=axes[0],axlabel="NHR-" + "skew:" + str(skew(df['NHR'])))
sns.distplot(df['HNR'],ax=axes[1],axlabel="HNR-" + "skew:" +str(skew(df['HNR'])))
plt.show()
f, axes = plt.subplots(1, 2, figsize=(15,8))
sns.distplot(df['RPDE'],ax=axes[0],axlabel="RPDE-" + "skew:" + str(skew(df['RPDE'])))
sns.distplot(df['D2'],ax=axes[1],axlabel="D2-" + "skew:" +str(skew(df['D2'])))
plt.show()
sns.distplot(df['DFA'],axlabel="DFA-" + "skew:" + str(skew(df['DFA'])))
f, axes = plt.subplots(1, 3, figsize=(15,8))
sns.distplot(df['spread1'],ax=axes[0],axlabel="spread1-" + "skew:" + str(skew(df['spread1'])))
sns.distplot(df['spread2'],ax=axes[1],axlabel="spread2-" + "skew:" +str(skew(df['spread2'])))
sns.distplot(df['PPE'],ax=axes[2],axlabel="PPE-" + "skew:" +str(skew(df['PPE'])))
plt.show()
plt.figure(figsize=(15,10))
sns.heatmap(df.corr(), annot=True, fmt='.2f')
g = sns.pairplot(df.drop(['name'],axis=1), hue="status", palette="husl")
f, axes = plt.subplots(1, 3, figsize=(15,8))
sns.boxplot(x=df['status'],y=df['MDVP:Fo(Hz)'],ax=axes[0])
sns.boxplot(x=df['status'],y=df['MDVP:Flo(Hz)'],ax=axes[1])
sns.boxplot(x=df['status'],y=df['MDVP:Fhi(Hz)'],ax=axes[2])
plt.show()
#Viewing on same Y axis
f, axes = plt.subplots(1, 3, figsize=(15,8),sharey='row')
sns.boxplot(x=df['status'],y=df['MDVP:Fo(Hz)'],ax=axes[0])
sns.boxplot(x=df['status'],y=df['MDVP:Flo(Hz)'],ax=axes[1])
sns.boxplot(x=df['status'],y=df['MDVP:Fhi(Hz)'],ax=axes[2])
plt.show()
f, axes = plt.subplots(2,3,figsize=(15,15))
sns.boxplot(x=df['status'],y=df['MDVP:Jitter(%)'],ax=axes[0,0])
sns.boxplot(x=df['status'],y=df['MDVP:Jitter(Abs)'],ax=axes[0,1])
sns.boxplot(x=df['status'],y=df['MDVP:RAP'],ax=axes[0,2])
sns.boxplot(x=df['status'],y=df['MDVP:PPQ'],ax=axes[1,0])
sns.boxplot(x=df['status'],y=df['Jitter:DDP'],ax=axes[1,1])
f, axes = plt.subplots(2, 3, figsize=(15,15))
sns.boxplot(x=df['status'],y=df['MDVP:Shimmer'],ax=axes[0,0] )
sns.boxplot(x=df['status'],y=df['MDVP:Shimmer(dB)'],ax=axes[0,1] )
sns.boxplot(x=df['status'],y=df['Shimmer:APQ3'],ax=axes[0,2])
sns.boxplot(x=df['status'],y=df['Shimmer:APQ5'],ax=axes[1,0])
sns.boxplot(x=df['status'],y=df['MDVP:APQ'],ax=axes[1,1])
sns.boxplot(x=df['status'],y=df['Shimmer:DDA'],ax=axes[1,2])
plt.show()
f, axes = plt.subplots(1, 2, figsize=(15,8))
sns.boxplot(x=df['status'],y=df['NHR'],ax=axes[0])
sns.boxplot(x=df['status'],y=df['HNR'],ax=axes[1])
plt.show()
f, axes = plt.subplots(1, 2, figsize=(15,8))
sns.boxplot(x=df['status'],y=df['RPDE'],ax=axes[0])
sns.boxplot(x=df['status'],y=df['D2'],ax=axes[1])
plt.show()
sns.boxplot(x=df['status'],y=df['DFA'])
f, axes = plt.subplots(1, 3, figsize=(15,8))
sns.boxplot(x=df['status'],y=df['spread1'],ax=axes[0])
sns.boxplot(x=df['status'],y=df['spread2'],ax=axes[1])
sns.boxplot(x=df['status'],y=df['PPE'],ax=axes[2])
plt.show()
df.isnull().sum()
df.isna().sum()
df=df.drop("name",axis=1)
scaler=MinMaxScaler((-1,1))
X=scaler.fit_transform( df.loc[:,df.columns != 'status'].values[:,1:])
y=df.loc[:,'status'].values
X[:,0:1]
#X = df.drop("status",axis=1)
#Y = df["status"]
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.7, random_state=10)
print(len(X_train)),print(len(X_test))
y_train.shape, y_test.shape
y_test[0:5]
from sklearn import metrics
def draw_cm( actual, predicted ):
cm = metrics.confusion_matrix( actual, predicted, [0,1] )
sns.heatmap(cm, annot=True, fmt='.0f', xticklabels = ["Status 0", "Status 1"] , yticklabels = ["Status 0", "Status 1"] )
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
modelComp=pd.DataFrame()
#Logistic Regression
from sklearn.linear_model import LogisticRegression
logRegModel=LogisticRegression()
logRegModel.fit(X_train,y_train)
y_predict=logRegModel.predict(X_test)
from sklearn.metrics import accuracy_score,confusion_matrix,recall_score,f1_score,precision_score,roc_curve,log_loss,auc
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=pd.DataFrame({'Model':['Logistic Regression - 0.5'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]})
from sklearn.preprocessing import binarize
#changing the threshold to 0.4
y_pred_class = binarize([logRegModel.predict_proba(X_test)[:, 1]], 0.4)[0]
print('Accuracy score:',accuracy_score(y_test,y_pred_class))
print('confuion matrix:\n',confusion_matrix(y_test,y_pred_class))
print('Recall Score: ',recall_score(y_test, y_pred_class))
print('Precission Score: ',precision_score(y_test, y_pred_class))
print('F1 Score: ',f1_score(y_test, y_pred_class))
draw_cm(y_test, y_pred_class)
modelComp=modelComp.append(pd.DataFrame({'Model':['Logistic Regression - 0.4'],'Accuracy':[accuracy_score(y_test,y_pred_class)*100],'Precission':[precision_score(y_test, y_pred_class)*100],'Recall':[recall_score(y_test, y_pred_class)*100]}))
#KNN
from sklearn.neighbors import KNeighborsClassifier
KnnModel = KNeighborsClassifier(n_neighbors=3)
KnnModel.fit(X_train,y_train)
y_predict=KnnModel.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['KNN - 3 Neigbours'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
#NaiveBayes Gaussian
from sklearn.naive_bayes import GaussianNB,BernoulliNB
NBGauModel = GaussianNB()
NBGauModel.fit(X_train,y_train)
y_predict=NBGauModel.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Naive Bayes - Gaussian'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
#SVM
from sklearn.svm import SVC
clf = SVC(kernel='linear')
clf.fit(X_train,y_train)
y_predict=clf.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['SVC'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
#Comapring Accuracy precission and Recall
modelComp
#META classifier
#Stacking
from sklearn.ensemble import StackingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
#Base Learners
#knn_clf = KNeighborsClassifier(n_neighbors=3)
#svc_clf = SVC(kernel='linear')
#lr_clf = LogisticRegression(max_iter=10000)
#nb_Gau_clf=GaussianNB()
estimators = [('knn_clf',KNeighborsClassifier(n_neighbors=3)),
('nb_clf',GaussianNB()),
('lr_clf',LogisticRegression()),
('svc_clf',SVC(kernel='linear'))]
lr = LogisticRegression(max_iter=10000) # meta classifier
sclf = StackingClassifier(estimators=estimators, final_estimator=lr)
sclf.fit(X_train,y_train)
y_predict=sclf.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Stacking Classifier(KNN,NB,LR,SVC)LR'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
y_pred_proba = sclf.predict_proba(X_test)[:, 1]
[fpr, tpr, thr] = roc_curve(y_test, y_pred_proba)
plt.figure()
plt.plot(fpr, tpr, color='coral', label='ROC curve Stacking - LogReg(area = %0.3f)' % auc(fpr, tpr))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - specificity)', fontsize=14)
plt.ylabel('True Positive Rate (recall)', fontsize=14)
plt.title('Receiver operating characteristic (ROC) curve')
plt.legend(loc="lower right")
plt.show()
modelComp
#Changing the final estimator to SVC in stacking
estimators = [('knn_clf',KNeighborsClassifier(n_neighbors=3)),
('nb_clf',GaussianNB()),
('lr_clf',LogisticRegression()),
('svc_clf',SVC(kernel='linear'))]
lr = LogisticRegression() # meta classifier
sclf = StackingClassifier(estimators=estimators, final_estimator=SVC(kernel='linear'))
sclf.fit(X_train,y_train)
y_predict=sclf.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Stacking Classifier(KNN,NB,LR,SVC)SVC'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
modelComp
#Changing the final estimator to GaussianNB
estimators = [('knn_clf',KNeighborsClassifier(n_neighbors=3)),
('nb_clf',GaussianNB()),
('lr_clf',LogisticRegression()),
('svc_clf',SVC(kernel='linear'))]
lr = LogisticRegression() # meta classifier
sclf = StackingClassifier(estimators=estimators, final_estimator=GaussianNB())
sclf.fit(X_train,y_train)
y_predict=sclf.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Stacking Classifier(KNN,NB,LR,SVC)NB'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
modelComp
#Decission Tree
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier(criterion='entropy',max_depth=5,random_state=10,min_samples_leaf=5)
dt_model.fit(X_train, y_train)
y_predict=dt_model.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Decision Tree'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
y_pred_proba = dt_model.predict_proba(X_test)[:, 1]
[fpr0, tpr0, thr0] = roc_curve(y_test, y_pred_proba)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve Stacking - LogReg(area = %0.3f)' % auc(fpr, tpr))
plt.plot(fpr0, tpr0, label='ROC curve DecissionTree(area = %0.3f)' % auc(fpr0, tpr0))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - specificity)', fontsize=14)
plt.ylabel('True Positive Rate (recall)', fontsize=14)
plt.title('Receiver operating characteristic (ROC) curve')
plt.legend(loc="lower right")
plt.show()
modelComp
#Bagging Classifer
from sklearn.ensemble import BaggingClassifier
bgclf = BaggingClassifier(base_estimator=dt_model, n_estimators=50, max_samples=.7)
bgclf = bgclf.fit(X_train, y_train)
y_predict=bgclf.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['BaggingClassifier'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
y_pred_proba = bgclf.predict_proba(X_test)[:, 1]
[fpr1, tpr1, thr1] = roc_curve(y_test, y_pred_proba)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve Stacking - LogReg(area = %0.3f)' % auc(fpr, tpr))
plt.plot(fpr0, tpr0, label='ROC curve DecissionTree(area = %0.3f)' % auc(fpr0, tpr0))
plt.plot(fpr1, tpr1, label='ROC curve Bagging - DT(area = %0.3f)' % auc(fpr1, tpr1))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - specificity)', fontsize=14)
plt.ylabel('True Positive Rate (recall)', fontsize=14)
plt.title('Receiver operating characteristic (ROC) curve')
plt.legend(loc="lower right")
plt.show()
modelComp
#Applying Ensemble Model
#Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
rfclf = RandomForestClassifier(n_estimators = 50)
rfclf.fit(X_train, y_train)
y_predict=rfclf.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Random Forest'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
y_pred_proba = rfclf.predict_proba(X_test)[:, 1]
[fpr2, tpr2, thr2] = roc_curve(y_test, y_pred_proba)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve Stacking - LogReg(area = %0.3f)' % auc(fpr, tpr))
plt.plot(fpr0, tpr0, label='ROC curve DecissionTree(area = %0.3f)' % auc(fpr0, tpr0))
plt.plot(fpr1, tpr1, label='ROC curve Bagging - DT(area = %0.3f)' % auc(fpr1, tpr1))
plt.plot(fpr2, tpr2, label='ROC curve RandomForest(area = %0.3f)' % auc(fpr2, tpr2))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - specificity)', fontsize=14)
plt.ylabel('True Positive Rate (recall)', fontsize=14)
plt.title('Receiver operating characteristic (ROC) curve')
plt.legend(loc="lower right")
plt.show()
modelComp
# ADABOOST
from sklearn.ensemble import AdaBoostClassifier
abcl = AdaBoostClassifier( n_estimators= 20)
abcl.fit(X_train, y_train)
y_predict=abcl.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['Ada Boost'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
y_pred_proba = abcl.predict_proba(X_test)[:, 1]
[fpr3_0, tpr3_0, thr3_0] = roc_curve(y_test, y_pred_proba)
plt.figure(figsize=(8,8))
plt.plot(fpr, tpr, label='ROC curve Stacking - LogReg(area = %0.3f)' % auc(fpr, tpr))
plt.plot(fpr0, tpr0, label='ROC curve DecissionTree(area = %0.3f)' % auc(fpr0, tpr0))
plt.plot(fpr1, tpr1, label='ROC curve Bagging - DT(area = %0.3f)' % auc(fpr1, tpr1))
plt.plot(fpr2, tpr2, label='ROC curve RandomForest(area = %0.3f)' % auc(fpr2, tpr2))
plt.plot(fpr3_0, tpr3_0, label='ROC curve AdaBost(area = %0.3f)' % auc(fpr3_0, tpr3_0))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - specificity)', fontsize=14)
plt.ylabel('True Positive Rate (recall)', fontsize=14)
plt.title('Receiver operating characteristic (ROC) curve')
plt.legend(loc="lower right")
plt.show()
modelComp
#Gradient Boost
from sklearn.ensemble import GradientBoostingClassifier
gbcl = GradientBoostingClassifier(n_estimators = 1000, learning_rate = 0.001)
gbcl = gbcl.fit(X_train, y_train)
y_predict=gbcl.predict(X_test)
print('Accuracy score:',accuracy_score(y_test,y_predict))
print('confuion matrix:\n',confusion_matrix(y_test,y_predict))
print('Recall Score: ',recall_score(y_test, y_predict))
print('Precission Score: ',precision_score(y_test, y_predict))
print('F1 Score: ',f1_score(y_test, y_predict))
draw_cm(y_test, y_predict)
modelComp=modelComp.append(pd.DataFrame({'Model':['GradientBoostingClassifier'],'Accuracy':[accuracy_score(y_test,y_predict)*100],'Precission':[precision_score(y_test, y_predict)*100],'Recall':[recall_score(y_test, y_predict)*100]}))
y_pred_proba = gbcl.predict_proba(X_test)[:, 1]
[fpr3, tpr3, thr3] = roc_curve(y_test, y_pred_proba)
plt.figure(figsize=(8,4))
plt.plot(fpr, tpr, label='ROC curve Stacking - LogReg(area = %0.3f)' % auc(fpr, tpr))
plt.plot(fpr0, tpr0, label='ROC curve DecissionTree(area = %0.3f)' % auc(fpr0, tpr0))
plt.plot(fpr1, tpr1, label='ROC curve Bagging - DT(area = %0.3f)' % auc(fpr1, tpr1))
plt.plot(fpr2, tpr2, label='ROC curve RandomForest(area = %0.3f)' % auc(fpr2, tpr2))
plt.plot(fpr3_0, tpr3_0, label='ROC curve AdaBost(area = %0.3f)' % auc(fpr3_0, tpr3_0))
plt.plot(fpr3, tpr3, label='ROC curve GradientBoost(area = %0.3f)' % auc(fpr3, tpr3))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - specificity)', fontsize=14)
plt.ylabel('True Positive Rate (recall)', fontsize=14)
plt.title('Receiver operating characteristic (ROC) curve')
plt.legend(loc="lower right")
plt.show()
modelComp